*Generates a monthly series with info on monthly transitions
*Version 15 Stata

set more off 

*******************************************************************************
*NOTE: Overview of do-file

*Step 1: construct a series at Member-Month level from Pharmacy claims of antidepressants for members from the 10% random sample
*Step 2: clean up the member-month panel data: the final version of molecule/option is called "molecule_final" (line 146-153)
*Step 3: determine drug transitions within the treament period, which starts from 1 month prior to the first antidepressant, and ends three months after the last observed antidepressant prescription 
*Step 4: flag violations of prescribing guidelines

* --> Final dataset: "adults_sample_series_merged.dta" (needs to be incorporated into gen_monthly_data.do) 

*******************************************************************************


*Step 1: Generate a monthly time series of antidepressant prescriptions from
*	t0 = first month observed in BCBS data
********************************************************************************
*bring in pharmacy data (originally processed in gen_monthly_data.do)
use PHARM_adults_sample_ad.dta, clear

*merge in patient level information on demographics
preserve
import delimited member_adults_sample_covered.csv, varnames(1) clear
save member_adults_sample_covered.dta, replace
restore

merge m:1 memb_bid_str using member_adults_sample_covered.dta
keep if _merge==3

*generate a list of the patients
bysort memb_bid_str: gen unique=_n
keep if unique==1
keep memb_bid_str min_month_code
save adults_sample_series.dta, replace

*append this list of patients to itself another 44 times, i.e., for each member, there are 45 months in total from Jan 2013 to Sept 2016
local i=0
while `i'<=43{
	append using adults_sample_series.dta
	local i=`i'+1
	disp `i'
}

*generate month_code \in [1,45] where 1 is Jan 2013 and 45 is Sept 2016
bysort memb_bid_str: gen month_code=_n

*keep only those months after the a member's initial enrollment at BCBS
*NOTE: if enrolled before Jan 2013, min_month_code=0 and all 45 months are preserved
drop if month_code<min_month_code

*save resulting dataset
save adults_sample_series.dta, replace

*Step 2: Merge antidepressant claims into this coverage backbone 
********************************************************************************
*bring in backbone
use adults_sample_series.dta, clear
merge 1:m memb_bid_str month_code using PHARM_adults_sample_ad.dta
drop if _merge==2 
*drop any claims which occur outside of the coverage period

replace day_code=round(day_code*100)

*define molecules (34 active ingredients, including null)
sort memb_bid_str month_code day_code
bysort memb_bid_str: gen order=_n
tab substancename,sort
gen molecule=12
replace molecule=0 if _merge==1
replace molecule=1 if substancename=="SERTRALINE HYDROCHLORIDE"
replace molecule=2 if substancename=="CITALOPRAM HYDROBROMIDE"
replace molecule=3 if substancename=="FLUOXETINE HYDROCHLORIDE"
replace molecule=4 if inlist(substancename,"BUPROPION HYDROBROMIDE","BUPROPION HYDROCHLORIDE")
replace molecule=5 if substancename=="ESCITALOPRAM OXALATE"
replace molecule=6 if substancename=="TRAZODONE HYDROCHLORIDE"
replace molecule=7 if substancename=="VENLAFAXINE HYDROCHLORIDE"
replace molecule=8 if substancename=="DULOXETINE HYDROCHLORIDE"
replace molecule=9 if inlist(substancename,"PAROXETINE HYDROCHLORIDE ANHYDROUS","PAROXETINE HYDROCHLORIDE HEMIHYDRATE","PAROXETINE MESYLATE")
replace molecule=10 if substancename=="AMITRIPTYLINE HYDROCHLORIDE"
replace molecule=11 if substance=="MIRTAZAPINE"

*define cocktail (more than one molecule in the same month)
bysort memb_bid_str month_code: egen check_cocktail=sd(molecule)
count if check_cocktail!=0 & check_cocktail!=.

*"molecule2" has 14 options ranging from 0 to 13 (cocktail)
gen molecule2=molecule
replace molecule2=13 if check_cocktail!=0 & check_cocktail!=.
drop check_cocktail

*define first molecule each member prescribed in this sample period
bysort memb_bid_str: gen first_molecule=molecule2[1] 
tab first_molecule if order==1

*check whether we've assigned a single "molecule2" to each member month
bysort memb_bid_str month_code molecule2: gen check=_n
count if check==1
drop check

*day_code for months without scripts of antidepress -- 15th 
count if day_code==.
replace day_code=month_code*100+15 if day_code==.

*label variables
rename _merge merge_series
label var merge_series "3 if claim on antidepress"
label var order "order under each Member"
label var order2 "order under each Member-Month"
label var molecule "molecule based on substance plus no drug"
label var molecule2 "define cocktail if >1 molecule within each calendar month"
label var first_molecule "molecule2 at the first month in this series"

** define previous molecule
sort memb_bid_str month_code day_code
bysort memb_bid_str: gen prev_molecule=molecule2[_n-1] 
bysort memb_bid_str month_code: gen order2=_n 
replace prev_molecule=. if order2!=1

*define molecule prescribed 2 and 3 months previously
bysort memb_bid_str: gen prev_molecule2=molecule2[_n-2]
bysort memb_bid_str: gen prev_molecule3=molecule2[_n-3]

*stretch out prescriptions, i.e., try to assign a molecule if previous ones last longer than a month
bysort memb_bid_str: gen prev_length=days_supply_cnt[_n-1] // how long the script of antidepress from month (t-1) lasts 
bysort memb_bid_str: gen prev_length2=days_supply[_n-2] // how long the script of antidepress from month (t-2) lasts 
bysort memb_bid_str: gen prev_length3=days_supply[_n-3] // how long the script of antidepress from month (t-3) lasts 

*estimate gap in days since the last script in the previous month (t-1)
*NOTE: "day_code" defined as month_code*100+day of script e.g. Jan 15th in 2014 would have day code 13*100+15=1,315
bysort memb_bid_str: gen day_gap=day_code-day_code[_n-1]
replace day_gap=. if order2!=1 // not considering gap in days between scripts within the same calendar month
replace day_gap=. if molecule2!=0
sum day_gap // 18,731,557 obs range from 84 to 114 -- please see notes on "day_code"; adjust in the next line!
replace day_gap=day_gap-69 if day_gap!=. // now range from 15 to 45

*gap in days since the last script in month (t-2)
bysort memb_bid_str: gen day_gap2=day_code-day_code[_n-2]
replace day_gap2=. if order2!=1 
replace day_gap2=. if molecule2!=0 | prev_molecule!=0 // if so then no need to go back 2 months
sum day_gap2 // 16,437,969 obs ranging from 184 to 214
replace day_gap2=day_gap2-69*2 if day_gap2!=. // 46 to 76

*gap in days since the last script in month (t-3)
bysort memb_bid_str: gen day_gap3=day_code-day_code[_n-3]
replace day_gap3=. if order2!=1 
replace day_gap3=. if molecule2!=0 | prev_molecule!=0 | prev_molecule2!=0 
sum day_gap3 // 14,881,761 obs from 284 to 314
replace day_gap3=day_gap3-69*3 if day_gap3!=. // 77 to 107

*define final molecule 
gen molecule_final=molecule2
/* the three steps below try to assign a molecule for months without scripts if 
the last available script in the previous 3 months potentially lasts till now. */ 

* (1) go back 1 month: 1,759,688 changes made from 0 
* assign molecule from (t-1) to t if currently no drug and days since the last script in (t-1) <= length of the script + 10 days 
replace molecule_final=prev_molecule if molecule2==0 & inrange(day_gap,0,prev_length+10)

* (2) go back 2 months: 589,320 changes made
* assign molecule from (t-2) to t if no drug in months t and (t-1) and days since the last script in (t-2) <= length of the script + 10 days
replace molecule_final=prev_molecule2 if molecule2==0 & prev_molecule==0 & inrange(day_gap2,0,prev_length2+10)

* (3) go back 3 months: 215,125 changes made
* assign molecule from (t-3) to t if no drug in months t,(t-1) and (t-2) and days since the last script in (t-3) <= length of the script + 10 days
replace molecule_final=prev_molecule3 if molecule2==0 & prev_molecule==0 & prev_molecule2==0 & inrange(day_gap3,0,prev_length3+10)

count if molecule2==0 
count if molecule_final==0 

sort memb_bid_str month_code day_code
bysort memb_bid_str: gen prev_molecule_final=molecule_final[_n-1]
tab prev_molecule_final if order2==1

*parse variables
drop prev_molecule prev_molecule2 prev_molecule3 prev_length prev_length2 prev_length3 day_gap day_gap2 day_gap3

*save final file
save adults_sample_series_merged.dta, replace

*Step 3: Work on transition matrices: define treatment period for each member, merge in prescribers' info by NPI,
*	and run program for transitions: "drug_transition"
********************************************************************************
*bring in data just created
use adults_sample_series_merged.dta, clear
sort memb_bid_str month_code day_code

count if order==1
count if order2==1

*define treatment period by identifying when first antidepressant was prescribed
gen i_ad2=(molecule_final!=0)
bysort memb_bid_str: gen temp_sum_ad2=sum(i_ad2)
bysort memb_bid_str: gen sum_ad2=sum(temp_sum_ad2)

gen temp_month=month_code * (sum_ad2==1) 
bysort memb_bid_str: egen first_month=max(temp_month)

*parse variables
drop temp_month temp_sum_ad2 sum_ad2 

*define each member's depression treatment period from 1 month before the first antidepressant
count if month_code>=first_month-1
keep if month_code>=first_month-1 

sort memb_bid_str month_code day_code
bysort memb_bid_str: gen order_new=_n
count if order_new==1 
label var order_new "New order within each Member"

*end treatment period at three months after the latest script
bysort memb_bid_str: gen temp_sum_ad2=sum(i_ad2) 
bysort memb_bid_str: egen temp_max=max(temp_sum_ad2)

gen temp_month= month_code*(temp_max==temp_sum_ad2)
replace temp_month=0 if molecule_final==0 
bysort memb_bid_str: egen max_ad_month=max(temp_month) 

*trim off post-treatment period records
keep if month_code<=max_ad_month+3

*parse variables
drop temp_sum_ad2 temp_max temp_month

*hard code missing molecule if first antidepressant
replace prev_molecule_final=. if order_new==1 

*merge NPI data
sort memb_bid_str month_code day_code

*flag change of prescriber (by NPI):  
replace npi_prscrb_prov_id_cd="" if npi_prscrb_prov_id_cd=="9999999999"
bysort memb_bid_str: gen prev_NPI1=npi_prscrb_prov_id_cd[_n-1] 
bysort memb_bid_str: gen prev_NPI2=npi_prscrb_prov_id_cd[_n-2] 
bysort memb_bid_str: gen prev_NPI3=npi_prscrb_prov_id_cd[_n-3] 

*if prescriber in current month missing, try to find the most recent one in the past 3 months 
gen npi_str=npi_prscrb_prov_id_cd
replace npi_str=prev_NPI1 if npi_prscrb_prov_id_cd==""
replace npi_str=prev_NPI2 if npi_prscrb_prov_id_cd=="" & prev_NPI1==""
replace npi_str=prev_NPI3 if npi_prscrb_prov_id_cd=="" & prev_NPI1=="" & prev_NPI2==""

*assign npi_str in the month prior to the first script (same as the prescriber of the first script of antidepressant)
tab npi_str if molecule_final==0 & order_new==1
bysort memb_bid_str: gen next_NPI=npi_prscrb_prov_id_cd[_n+1]
replace npi_str=next_NPI if npi_prscrb_prov_id_cd=="" & order_new==1 & molecule_final==0

count if npi_prscrb_prov_id_cd!=""
count if npi_str!=""

*define previous prescriber
bysort memb_bid_str: gen prev_npi_str=npi_str[_n-1] 

*flag if changed prescriber in this month
gen change_prescriber=(npi_str!=prev_npi_str) 
replace change_prescriber=. if npi_str=="" | prev_npi_str==""

*merge in NPI information from NPPES
merge m:1 npi_str using npidata_cleaned_main.dta
drop if _merge==2
rename _merge merge_NPI

*sort the data
sort memb_bid_str month_code day_code
count
count if order2==1

*define transition program
program drop _all

program define drug_transition
syntax,specialty(int) change_prescriber(str)
   * specialty {0,1,2,3} - 0="all",0.5="physician",1="psych",2="GP",3="other"
   
   set more off
   capture log close 
   log using "transition_prov_special`specialty'_change_`change_prescriber'.log",replace

   preserve 
   
   keep if order2==1 
   
   tab molecule_final
   
	if `specialty'==-1{
		keep if i_phy==1
	}
	if `specialty'==1{
		keep if i_psych==1
	}
	if `specialty'==2{
		keep if i_GP==1
	}
	if `specialty'==3{
		keep if i_phy==1 & i_psych==0 & i_GP==0 
	}
   if "`change_prescriber'"=="same"{
	   keep if change_prescriber==0 
   }
   if "`change_prescriber'"=="diff"{
	   keep if change_prescriber==1 
   }
   
   tab molecule_final if prev_molecule_final==0,matcell(freq)
   if `=rowsof(freq)'<14{
	 disp "check log file!"
     matrix freq=freq\J(14-`=rowsof(freq)',1,0)
   }
   
   forvalues m=1/13{
		tab molecule_final if prev_molecule_final==`m',matcell(freq_append)
		
		if `=rowsof(freq_append)'<14{
		disp "check log file!"
		matrix freq_append=freq_append\J(14-`=rowsof(freq_append)',1,0)
		}
		matrix freq=(freq,freq_append)
   }
   
   putexcel set "transition_prov_special`specialty'_change_`change_prescriber'.csv",replace
   putexcel A1=matrix(freq)
   putexcel clear 
   
   restore 
   log close 

end


drug_transition, specialty(0) change_prescriber("all")
drug_transition, specialty(0) change_prescriber("same")
drug_transition, specialty(0) change_prescriber("diff")


drug_transition, specialty(-1) change_prescriber("all")
drug_transition, specialty(-1) change_prescriber("same")
drug_transition, specialty(-1) change_prescriber("diff")

drug_transition, specialty(1) change_prescriber("all")
drug_transition, specialty(1) change_prescriber("same")
drug_transition, specialty(1) change_prescriber("diff")


drug_transition, specialty(2) change_prescriber("all")
drug_transition, specialty(2) change_prescriber("same")
drug_transition, specialty(2) change_prescriber("diff")

drug_transition, specialty(3) change_prescriber("all")
drug_transition, specialty(3) change_prescriber("same")
drug_transition, specialty(3) change_prescriber("diff")

*Step 4: flag violations of prescribing guidelines
********************************************************************************

*bring in the labels of violations for 14*14 possible transitions 
use violate-guideline.dta, clear
gen any_violate0=(guide_UK+ guide_CA+ guide_US>0)
gen any_violate=(guide_UK+ guide_CA+ guide_US+ guide_cocktail>0)
label var any_violate "1 if violating any guideline"
label var any_violate0 "1 if violating UK/CA/US guidelines"
save violate-guideline2.dta, replace

*bring in transition data crated in Step 3
use adults_sample_series_merged.dta, clear
merge m:1 prev_molecule_final molecule_final using violate-guideline2.dta
rename _merge merge_guideline

* count no. violations 
foreach var of varlist guide* any_violate0 any_violate{
	disp "`var'"
	count if `var'==1 & i_phy!=. & order2==1
	count if `var'==1 & i_phy==1 & order2==1
	count if `var'==1 & i_psych==1 & order2==1
	count if `var'==1 & i_GP==1 & order2==1
	count if `var'==1 & i_phy==1 & i_GP==0 & i_psych==0 & order2==1
}

keep if order2==1

*save resulting file (This is the file that will need to be used in gen_monthly_data.do.)
save adults_sample_series_merged.dta, replace